Leemos los datos del fichero extraído de Kaggle: https://www.kaggle.com/datasets/anandhuh/covid19-in-world-countrieslatest-data?resource=download.
Acto seguido, preparamos los datos para el grafo deseado.
# Carga y primera lectura (escogemos únicamente los 10 países más poblados).
countries <- read.csv("C:/Users/carlo/OneDrive/Documentos/MASTER/4o semestre/Visualización de datos/PEC2/worldwide covid data.csv")
countries <- head(countries[order(countries$Population, decreasing=TRUE), ], 10)
summary(countries)
## Country.Other Total.Cases Total.Deaths Total.Recovered
## Length:10 Min. : 168362 Min. : 3142 Min. : 141898
## Class :character 1st Qu.: 1633154 1st Qu.: 29434 1st Qu.: 1587706
## Mode :character Median : 5879610 Median : 239761 Median : 5418114
## Mean :18903629 Mean : 311538 Mean :18352255
## 3rd Qu.:27142653 3rd Qu.: 484371 3rd Qu.:26162509
## Max. :82133342 Max. :1013044 Max. :80015081
## Active.Cases Tot.Cases..1M.pop Deaths..1M.pop Total.Tests
## Min. : 2666 Min. : 117 Min. : 3.0 Min. : 4977858
## 1st Qu.: 13602 1st Qu.: 7923 1st Qu.: 143.2 1st Qu.: 18714172
## Median : 50689 Median : 26156 Median : 465.5 Median : 78614352
## Mean : 239836 Mean : 62472 Mean :1237.3 Mean :243996718
## 3rd Qu.: 375485 3rd Qu.:103429 3rd Qu.:2528.0 3rd Qu.:245050000
## Max. :1105217 Max. :245579 Max. :3074.0 Max. :992445691
## Tests..1M.pop Population
## Min. : 23131 Min. :1.313e+08
## 1st Qu.: 113226 1st Qu.:1.795e+08
## Median : 208994 Median :2.219e+08
## Mean : 649530 Mean :4.560e+08
## 3rd Qu.: 508241 3rd Qu.:3.205e+08
## Max. :2967412 Max. :1.439e+09
# Observamos los valores únicos de los países resultantes.
countries$Country.Other
## [1] "China" "India" "USA" "Indonesia" "Pakistan"
## [6] "Brazil" "Nigeria" "Bangladesh" "Russia" "Mexico"
# Mapeamos cada país a su respectivo continente, creando una variable categórica.
countries$Continent <- ifelse(countries$Country.Other=="China", "Asia",
ifelse(countries$Country.Other=="India", "Asia",
ifelse(countries$Country.Other=="USA", "America",
ifelse(countries$Country.Other=="Indonesia", "Asia",
ifelse(countries$Country.Other=="Pakistan", "Asia",
ifelse(countries$Country.Other=="Brazil", "America",
ifelse(countries$Country.Other=="Nigeria", "Africa",
ifelse(countries$Country.Other=="Bangladesh", "Asia",
ifelse(countries$Country.Other=="Russia", "Asia",
ifelse(countries$Country.Other=="Mexico", "America",
NA ))))))))))
# Factorizamos la variable Continent, para tener la distinción de color en el grafo.
countries$Continent <- as.factor(countries$Continent)
summary(countries$Continent)
## Africa America Asia
## 1 3 6
Realizamos la visualización deseada:
if (!require('ggplot2')) install.packages('ggplot2')
## Loading required package: ggplot2
library(ggplot2)
if (!require('plotly')) install.packages('plotly')
## Loading required package: plotly
## Warning: package 'plotly' was built under R version 4.1.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(plotly)
# Most basic bubble plot
p <- ggplot(countries, aes(x=Total.Tests, y=Tot.Cases..1M.pop, size = Population, color = Continent)) +
geom_point(alpha=0.7) + scale_size(range = c(2,10))
ggplotly(p)